_base_: ./finetune_gpt_base.yaml

Global:
  global_batch_size: 
  local_batch_size: 32
  micro_batch_size: 32
  

Engine:
  run_mode: epoch
  num_train_epochs: 3
  accumulate_steps:
  logging_freq: 10
  eval_freq: 1
  mix_precision:
    enable: True
    scale_loss: 32768.0
    custom_black_list: ["reduce_sum", "c_softmax_with_cross_entropy", "elementwise_div", "reduce_mean"]
    custom_white_list: ["lookup_table", "lookup_table_v2"]
  save_load:
    save_epoch: 1
    output_dir: ./output
    ckpt_dir:


Model:
  module: "GPTFinetuneModule"
  name: "GPT"
  num_classes: 2
  pretrained: './ckpt/PaddleFleetX_GPT_345M_220826/model'
  fuse_attn_qkv: True
  fused_linear: False
  vocab_size: 50304
  hidden_size: 1024
  num_layers: 24
  num_attention_heads: 16
  ffn_hidden_size: 4096
  hidden_dropout_prob: 0.1
  attention_probs_dropout_prob: 0.1
  max_position_embeddings: 1024
  type_vocab_size: 16
  initializer_range: 0.02
  use_recompute: False
  recompute_granularity:
  
  loss:
    train:
      name: 'CrossEntropyLoss'
    eval:
      name: 'CrossEntropyLoss'
  
  metric:
    eval:
      name: 'Accuracy'
  

Distributed:
  dp_degree: 1
  mp_degree: 1
  pp_degree: 1
  sharding:
    sharding_degree: 1
    sharding_stage: 1
    sharding_offload: False
    reduce_overlap: False
    broadcast_overlap: False
    
Optimizer:
  name: FusedAdamW
  weight_decay: 0.0
  beta1: 0.9
  beta2: 0.999
  epsilon: 1e-6
  multi_precision: True
  lr:
    name: LinearDecayWithWarmup
    warmup: 0.1
    learning_rate: 2e-5
  tensor_fusion: False
    
    
Data:
  Train:
    dataset:
      name: SST2
      root: ./dataset/SST-2/
      split: 'train'
      max_length: 128
    sampler:
      name: DistributedBatchSampler
      batch_size: 32
      shuffle: True
      drop_last: True
    loader:
      num_workers: 4
      return_list: False
  
  Eval:
    dataset:
      name: SST2
      root: ./dataset/SST-2/
      split: 'dev'
      max_length: 128
    sampler:
      name: DistributedBatchSampler
      batch_size: 32
      shuffle: False
      drop_last: False
    loader:
      num_workers: 4
      return_list: False
